Dependencies

library(patchwork)
library(tidyverse)
library(knitr)
library(kableExtra)
library(janitor)
library(scales)
library(RColorBrewer)
library(ggplot2)
library(ggridges)
library(ggrain)
library(gridExtra)
library(psych)
library(ggExtra)
library(ggrepel)  
library(ggdist)
library(report)
library(ggmagnify)
library(crayon)

Data

Load the processed data and apply the global exclusions.

data_processed <- read_csv("../data/processed/data_processed.csv")

data_processed_after_exclusions <- data_processed |>
  filter(exclude_participant == "include")

Sample descriptives

Sample size before exclusions

data_processed
## # A tibble: 102 × 15
##      subject date       time     age                gender  like positive prefer
##        <dbl> <date>     <time>   <chr>              <chr>  <dbl>    <dbl>  <dbl>
##  1 548957868 2022-06-23 10:46:30 23                 other…    NA       NA     NA
##  2 504546409 2022-06-23 11:54:55 other/missing/err… other…    NA       NA     NA
##  3 994692692 2022-06-23 12:23:32 21                 female     2        1      1
##  4 246532124 2022-06-23 12:27:58 23                 male       4       NA     NA
##  5 246532124 2022-06-23 12:32:30 23                 male       4       NA     NA
##  6 902223169 2022-06-23 12:33:36 19                 female     3        3      2
##  7 469672453 2022-06-23 12:35:04 27                 male       2        1      1
##  8 637089700 2022-06-23 13:35:40 43                 male       2        2      2
##  9   4345805 2022-06-23 12:37:12 54                 male       2        4      3
## 10 786441188 2022-06-23 13:37:07 27                 female     1        1      3
## # ℹ 92 more rows
## # ℹ 7 more variables: mean_evaluation <dbl>, AMP_score <dbl>,
## #   proportion_fast_trials_amp <dbl>, exclude_amp_performance <chr>,
## #   exclude_amp_completeness <chr>, exclude_duplicate_data <chr>,
## #   exclude_participant <chr>
data_processed |>
  count(name = "n") |>
  kable() |>
  add_header_above(header = c("Whole sample" = 1)) |> # note that you can add header rows to tables like this. The "1" indicates the number of columns the header should span. The sum of these numbers must equal the number of columns or you'll get an error.
  kable_classic(full_width = FALSE)
Whole sample
n
102

Sample size after exclusions

Sample used in subsequent analyses

data_processed_after_exclusions |>
  count(name = "n") |>
  kable() |>
  add_header_above(header = c("For analysis" = 1)) |>
  kable_classic(full_width = FALSE)
For analysis
n
90

Age

data_processed_after_exclusions |>
  mutate(age = as.numeric(age)) |>
  summarise(Mean = mean(age, na.rm = TRUE),
            SD = sd(age, na.rm = TRUE)) |>
  mutate_all(.funs = janitor::round_half_up, digits = 1) |>
  kable() |>
  add_header_above(header = c("Age" = 2)) |>
  kable_classic(full_width = FALSE)
Age
Mean SD
36.8 12.4

Gender

data_processed_after_exclusions |> 
  rename(Gender = gender) |>
  group_by(Gender) |> 
  summarise(n = n()) |> 
  mutate(Percent = paste0(round_half_up((n / sum(n)) * 100, 1), "%")) |>
  mutate(Gender = stringr::str_to_sentence(Gender)) |> # Change the case of the Gender variable so that it prints nicely
  kable() |>
  kable_classic(full_width = FALSE)
Gender n Percent
Female 39 43.3%
Male 48 53.3%
Nonbinary 3 3.3%

Descriptives

Descriptive statistics and plots of the measures (excluding the demographics variables)

Self-reported evaluations

Descriptive stats

# overall self-reported evaluations
dat_mean_ratings <- data_processed_after_exclusions |>
  summarise(Mean = mean(mean_evaluation, na.rm = TRUE),
            SD = sd(mean_evaluation, na.rm = TRUE),
            n = n()) |>
  mutate(group = "Full sample")

# self-reported evaluations by gender category
dat_mean_ratings_by_gender <- data_processed_after_exclusions |>
  group_by(group = gender) |>
  summarise(Mean = mean(mean_evaluation, na.rm = TRUE),
            SD = sd(mean_evaluation, na.rm = TRUE),
            n = n())

# combine both into one table
bind_rows(dat_mean_ratings,
          dat_mean_ratings_by_gender) |>
  select(Subset = group, Mean, SD, n) |> # select variables of interest, and rename one 
  mutate(Subset = stringr::str_to_sentence(Subset)) |> # Change the case of the Subset variable so that it prints nicely
  mutate_if(is.numeric, round_half_up, digits = 2) |>
  kable() |>
  add_header_above(header = c(" " = 1, "Self-reported evaluations" = 3)) |>
  kable_classic(full_width = FALSE)
Self-reported evaluations
Subset Mean SD n
Full sample 1.64 1.02 90
Female 1.32 0.71 39
Male 1.81 1.03 48
Nonbinary 3.33 2.19 3

Descriptive plot

ggplot(data_processed_after_exclusions, aes(x = mean_evaluation)) +
  geom_histogram(binwidth = 1,
                 boundary = 0,
                 fill = viridis_pal(begin = 0.45, option = "mako")(1), 
                 color = viridis_pal(begin = 0.30, option = "mako")(1)) + 
  xlab("Mean self-reported evaluation") +
  ylab("Frequency") +
  theme_linedraw() +
  scale_x_continuous(breaks = pretty_breaks(n = 7)) +
  coord_cartesian(xlim = c(1, 7)) +
  theme(panel.grid.minor = element_blank())

AMP evaluations

Descriptive stats

add table of means, SDs, Ns

Descriptive plots

ggplot(data_processed_after_exclusions, aes(x = AMP_score)) +
  geom_histogram(binwidth = 0.05,
                 boundary = 0,
                 fill = viridis_pal(begin = 0.45, option = "mako")(1), 
                 color = viridis_pal(begin = 0.30, option = "mako")(1)) + 
  xlab("AMP score") +
  ylab("Frequency") +
  theme_linedraw() +
  scale_x_continuous(breaks = pretty_breaks(n = 10))

Analyses & hypothesis tests

Self-reported evaluations are correlated with evaluations on the AMP

Plot

ggplot(data_processed_after_exclusions, 
       aes(x = AMP_score,
           y = mean_evaluation)) +
  geom_jitter(color = viridis_pal(begin = 0.45, option = "mako")(1),
              alpha = 0.5) +
  geom_smooth(method = "lm",
              color = viridis_pal(begin = 0.45, option = "mako")(1)) +
  xlab("AMP score") +
  ylab("Mean self-reported evaluation") +
  theme_linedraw() 

ggplot(data_processed_after_exclusions, 
       aes(y = AMP_score,
           x = mean_evaluation)) +
  geom_jitter(color = viridis_pal(begin = 0.45, option = "mako")(1),
              alpha = 0.5) +
  geom_smooth(method = "lm",
              color = viridis_pal(begin = 0.45, option = "mako")(1)) +
  ylab("AMP score") +
  xlab("Mean self-reported evaluation") +
  theme_linedraw() 

ggplot(data_processed_after_exclusions, 
       aes(x = AMP_score,
           y = mean_evaluation)) +
  geom_jitter(color = viridis_pal(begin = 0.45, option = "mako")(1),
              alpha = 0.5) +
  xlab("AMP score") +
  ylab("Mean self-reported evaluation") +
  theme_linedraw() 

More complex plots:

Axial hisograms

Scatter plots with axial histograms using ggExtra: https://cran.r-project.org/web/packages/ggExtra/vignettes/ggExtra.html

add axial histograms to a scatter plot. In a single plot, present different regression lines split by gender, and separate axial histograms for each gender.

facet_order <- c( "female", "male","nonbinary")
legend_labels <- c( "Female", "Male","Non-Binary")

dark2_colors <- brewer.pal(3, "Dark2")

axialhisto <- ggplot(data_processed_after_exclusions, 
                     aes(x = AMP_score,
                         y = mean_evaluation, colour = gender)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  scale_colour_manual(name = "Gender", values = dark2_colors, breaks = facet_order, labels=legend_labels)+
  labs(title = "Distribution and relationship: Self report and AMP",x = "AMP score", y = "Mean self-reported evaluation")+
  theme_linedraw() 

scatterPlot_axialhisto<-ggMarginal(axialhisto, type = "histogram", groupColour=TRUE, groupFill=TRUE)
scatterPlot_axialhisto

#scatterPlot_axialdensity<-ggMarginal(axialhisto, type = "density", groupColour=TRUE, groupFill=TRUE)
#scatterPlot_axialdensity

Labelled points

Label points using ggrepel: https://cran.r-project.org/web/packages/ggrepel/vignettes/ggrepel.html

Label the points in a scatter plot using their participant codes. Label only the participants with more extreme scores.

labelledplot<-ggplot(data_processed_after_exclusions, 
                     aes(x = AMP_score,
                         y = mean_evaluation, label= subject))+
  geom_point(color = "#D95F02")+
  labs(title = "Relationship: Self report and AMP",x = "AMP score", y = "Mean self-reported evaluation")+
  theme_linedraw() 

labelledplotfinal <- labelledplot + geom_text_repel() + labs(title = "Relationship: Self report and AMP with labeled points")
labelledplotfinal

cat("Informsation from the cours about colors")
## Informsation from the cours about colors
colors <- viridis_pal(begin = 0.0, end = 1.0, option = "mako")(10)
show_col(colors)

Magnify areas

Magnify areas of your plot with ggmagnify: https://hughjonesd.github.io/ggmagnify/

Magnify an area of one of your scatter plots, eg where there are a lot of data points in a small area.

cat("I used geom_jitter to better show the effect of magnify areas (instead of geom_point)")
## I used geom_jitter to better show the effect of magnify areas (instead of geom_point)
from <- c(xmin = 0.50, xmax = 0.53, ymin = 0.85, ymax = 1.1)
to <- c(xmin = 0.0, xmax = 0.195, ymin = 3.5, ymax = 5.125)

scatterplot_magnify <- ggplot(data_processed_after_exclusions, aes(x = AMP_score, y = mean_evaluation)) +
    geom_jitter(color = "#D95F02")+
  labs(title = "Relationship: Self report and AMP with magnify areas",x = "AMP score", y = "Mean self-reported evaluation")+
  theme_linedraw() 

ggmagnifyplot<-scatterplot_magnify+ geom_magnify(from = from, to = to)+theme_linedraw()
ggmagnifyplot

Test

run an appropriate test. Below the output, interpret the results. Consider using easystats::report(), but you can do this manually if you prefer.

cortest<-cor.test(data_processed_after_exclusions$mean_evaluation, data_processed_after_exclusions$AMP_score, method = "spearman", exact=FALSE)

cat("From the plots it looks like only AMP scores are approx. normally distributed, whereas self-reported evaluations are not.\n  Therefore, instead of a pearson test,I used the spearman test to investigate the correlation between these two variables.\n  The p-value (0.2) is rather large, so we cannot reject the hypotheses that the two variables are not correlated.\n  The results suggest that both variables are not correlated.")
## From the plots it looks like only AMP scores are approx. normally distributed, whereas self-reported evaluations are not.
##   Therefore, instead of a pearson test,I used the spearman test to investigate the correlation between these two variables.
##   The p-value (0.2) is rather large, so we cannot reject the hypotheses that the two variables are not correlated.
##   The results suggest that both variables are not correlated.
report(cortest)
## Effect sizes were labelled following Funder's (2019) recommendations.
## 
## The Spearman's rank correlation rho between
## data_processed_after_exclusions$mean_evaluation and
## data_processed_after_exclusions$AMP_score is negative, statistically not
## significant, and small (rho = -0.13, S = 1.37e+05, p = 0.229)

Self-reported evalautions differ between men and women

Plot

split histogram, split violin plot, raincloud plot, etc.

facet_order <- c( "male", "female","nonbinary")
legend_labels <- c( "Female", "Female","Non-Binary")
dark2_colors <- brewer.pal(3, "Dark2")

# split histogram plot
ggplot(data_processed_after_exclusions, aes(x = mean_evaluation, fill = gender)) +
  geom_histogram(binwidth = 0.2, color = "white", alpha = 0.8) +
  facet_wrap(~ gender, scales = "free") +
  labs(title = "Histogram of Mean self-reported evaluation by Gender with Free Scales", x = "Mean self-reported evaluation", y = "Frequency") +
  scale_fill_manual(values = dark2_colors) +
  guides(fill = "none") + 
  theme_bw()

ggplot(data_processed_after_exclusions, aes(x = mean_evaluation, fill = gender)) +
  geom_histogram(binwidth = 0.2, color = "white", alpha = 0.8) +
  facet_wrap(~ gender, scales = "fixed") +
  labs(title = "Histogram of Mean self-reported evaluation by Gender with Fixed Scales", x = "Mean self-reported evaluation", y = "Frequency") +
  scale_fill_manual(values = dark2_colors) +
  guides(fill = "none") + 
  theme_bw() +
  scale_y_continuous(breaks = seq(0, 30, by = 5))

cat("Adjusting the y-axis and background grid")
## Adjusting the y-axis and background grid
ggplot(data_processed_after_exclusions, aes(x = mean_evaluation, fill = gender)) +
  geom_histogram(binwidth = 0.2, color = "white", alpha = 0.8) +
  facet_wrap(~ gender, scales = "fixed") +
  labs(title = "Histogram of Mean self-reported evaluation by Gender with Fixed Scales", x = "Mean self-reported evaluation", y = "Frequency") +
  scale_fill_manual(values = dark2_colors) +
  guides(fill = "none") + 
  theme_bw() +
  scale_y_continuous(breaks = seq(0, 32, by = 4))+
  geom_hline(yintercept = seq(0, 32, by = 2), color = "gray",  alpha = 0.5)

#violin plot
ggplot(data_processed_after_exclusions, aes(x = gender, y = mean_evaluation, fill = gender)) +
  geom_violin() +
  scale_fill_manual(values = dark2_colors, name = "Gender", breaks = facet_order, labels=legend_labels)+
  labs(title = "Violin plot of Mean self-reported evaluation by Gender", x = "Gender", y = "Mean self-reported evaluation") +
  guides(fill = "none") + 
  theme_bw() 

#raincloud plot
ggplot(data_processed_after_exclusions, aes(x = gender, y = mean_evaluation, fill = gender)) + 
 stat_halfeye(adjust = 0.5,justification = -0.2, .width = 0,point_colour = NA) + 
  scale_fill_manual(values = dark2_colors) +
  labs(title = "Raincloud plot of Mean self-reported evaluation by Gender", x = "Gender", y = "Mean self-reported evaluation")+
  geom_boxplot(width = .1, outlier.shape = NA) +
  ggdist::stat_dots(side = "left", dotsize = .4, justification = 1.1,color="black", binwidth = .1)+
  theme_bw() +
  guides(fill = "none") + 
  coord_flip()

ggplot(data_processed_after_exclusions, aes(x = gender, y = mean_evaluation, fill = gender)) +
  geom_rain(alpha = 0.8) +
  scale_fill_manual(values = dark2_colors) +
  labs(title = "Raincloud plot of Mean self-reported evaluation by Gender", x = "Gender", y = "Mean self-reported evaluation")+
  guides(fill = "none") + 
  coord_flip()+
  theme_bw() 

Test

run an appropriate test. Below the output, interpret the results: write a few sentences that report and interpret the results following APA reporting style.

cat("Assumption from plots: self reported evaluations are NOT normally distributed. Also the shapiro tests support this. Therefore, I used the Wilcoxon rank-sum test.") 
## Assumption from plots: self reported evaluations are NOT normally distributed. Also the shapiro tests support this. Therefore, I used the Wilcoxon rank-sum test.
print(shapiro.test(subset(data_processed_after_exclusions, gender=='male')$mean_evaluation))
## 
##  Shapiro-Wilk normality test
## 
## data:  subset(data_processed_after_exclusions, gender == "male")$mean_evaluation
## W = 0.78322, p-value = 0.0000005536
print(shapiro.test(subset(data_processed_after_exclusions, gender=='male')$mean_evaluation))
## 
##  Shapiro-Wilk normality test
## 
## data:  subset(data_processed_after_exclusions, gender == "male")$mean_evaluation
## W = 0.78322, p-value = 0.0000005536
# Wilcoxon rank-sum test
wilcox_test_result <- wilcox.test(mean_evaluation ~ gender, data = subset(data_processed_after_exclusions, gender!="nonbinary"), exact=FALSE)
wilcox_test_result
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  mean_evaluation by gender
## W = 622.5, p-value = 0.002773
## alternative hypothesis: true location shift is not equal to 0
cat(" Wilcoxon rank-sum test was conducted to compare self-reported mean evaluation scores between male and female participants. The results revealed a statistically significant difference between males and females, ",italic("W")," = 622.5, ",italic("p"),"< 0.01. The median self-reported evaluation score was higher for males (",italic("M"),"=1.81) than for females (",italic("M"),"=1.32).")
##  Wilcoxon rank-sum test was conducted to compare self-reported mean evaluation scores between male and female participants. The results revealed a statistically significant difference between males and females,  W  = 622.5,  p < 0.01. The median self-reported evaluation score was higher for males ( M =1.81) than for females ( M =1.32).
mean_female <- data_processed_after_exclusions|>
  filter(gender == "female")|>
  summarize(mean_evaluation = mean(mean_evaluation)) 

mean_male <- data_processed_after_exclusions|>
  filter(gender == "male")|>
  summarize(mean_evaluation = mean(mean_evaluation)) 

# t-test, n>30: therefore, one could also use the t-test.
t_test_result_sr <- t.test(mean_evaluation ~ gender, data = subset(data_processed_after_exclusions, gender!="nonbinary"))
report(t_test_result_sr)
## Effect sizes were labelled following Cohen's (1988) recommendations.
## 
## The Welch Two Sample t-test testing the difference of mean_evaluation by gender
## (mean in group female = 1.32, mean in group male = 1.81) suggests that the
## effect is negative, statistically significant, and medium (difference = -0.49,
## 95% CI [-0.86, -0.12], t(83.14) = -2.62, p = 0.011; Cohen's d = -0.57, 95% CI
## [-1.01, -0.13])
cat(" T-test was conducted to compare self-reported mean evaluation scores between male and female participants. The results revealed a statistically significant difference between males and females, ",italic("t")," (83.14)= -2.62, ",italic("p"),"< 0.01. The median self-reported evaluation score was higher for males (",italic("M"),"=1.81) than for females (",italic("M"),"=1.32).")
##  T-test was conducted to compare self-reported mean evaluation scores between male and female participants. The results revealed a statistically significant difference between males and females,  t  (83.14)= -2.62,  p < 0.01. The median self-reported evaluation score was higher for males ( M =1.81) than for females ( M =1.32).

additional task: Alpha

#additional task: calculate the alphabet
alpha(subset(data_processed_after_exclusions, select = c(prefer, like, positive)), check.keys =TRUE)
## 
## Reliability analysis   
## Call: alpha(x = subset(data_processed_after_exclusions, select = c(prefer, 
##     like, positive)), check.keys = TRUE)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean sd median_r
##       0.88      0.88    0.84      0.71 7.5 0.023  1.6  1     0.68
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.83  0.88  0.92
## Duhachek  0.83  0.88  0.92
## 
##  Reliability if an item is dropped:
##          raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## prefer        0.79      0.80    0.67      0.67 4.0    0.043    NA  0.67
## like          0.81      0.81    0.68      0.68 4.2    0.040    NA  0.68
## positive      0.88      0.88    0.79      0.79 7.7    0.024    NA  0.79
## 
##  Item statistics 
##           n raw.r std.r r.cor r.drop mean  sd
## prefer   90  0.91  0.92  0.87   0.80  1.7 1.1
## like     90  0.90  0.91  0.86   0.79  1.6 1.0
## positive 90  0.88  0.87  0.75   0.71  1.7 1.2
## 
## Non missing response frequency for each item
##             1    2    3    4    5    6    7 miss
## prefer   0.67 0.12 0.11 0.07 0.03 0.00 0.00    0
## like     0.67 0.18 0.10 0.02 0.02 0.01 0.00    0
## positive 0.68 0.16 0.07 0.07 0.00 0.02 0.01    0
c("additional task: alpha=0.876904")
## [1] "additional task: alpha=0.876904"

Evaluations on the Affect Misattribution Procedure differ between men and women

Plot

split histogram, split violin plot, raincloud plot, etc.

This time, vary the labeling and order of the legend, e.g., capitalise “Men” and “Women”, and know how to change the order of the factors.

#First order and labeling (first version)
facet_order_1 <- c( "female", "male","nonbinary")
facet_labels_1 <- c( "Female", "Male","Non-Binary")

#Second order and labeling (second version)
facet_order_2 <- c( "male", "female","nonbinary")
facet_labels_2 <- c( "male", "female","non-binary")

#Histogram with the first version of facet_order and facet_labels
cat("Histogram with the first version of facet_order and facet_labels")
## Histogram with the first version of facet_order and facet_labels
ggplot(data_processed_after_exclusions, aes(x = AMP_score, fill = factor(gender))) +
  geom_histogram(binwidth = 0.05, color = "white", alpha = 0.8) +
  coord_cartesian(xlim = c(min(data_processed_after_exclusions$AMP_score), max(data_processed_after_exclusions$AMP_score))) +
  facet_wrap(~ factor(gender, levels = facet_order), scales = "fixed") +
  labs(title = "Histogram of AMP score by Gender: Version 1", x = "AMP score", y = "Frequency") +
  scale_fill_manual(values = dark2_colors, name = "Gender", breaks = facet_order_1, labels=facet_labels_1)+
  theme_bw() 

#Histogram with the second version of facet_order and facet_labels
cat("Histogram with the second version of facet_order and facet_labels")
## Histogram with the second version of facet_order and facet_labels
ggplot(data_processed_after_exclusions, aes(x = AMP_score, fill = factor(gender))) +
  geom_histogram(binwidth = 0.05, color = "white", alpha = 0.8) +
  coord_cartesian(xlim = c(min(data_processed_after_exclusions$AMP_score), max(data_processed_after_exclusions$AMP_score))) +
  facet_wrap(~ factor(gender, levels = facet_order), scales = "fixed") +
  labs(title = "Histogram of AMP score by Gender: Version 2", x = "AMP score", y = "Frequency") +
  scale_fill_manual(values = dark2_colors, name = "Gender", breaks = facet_order_2, labels=facet_labels_2)+
  theme_bw()

#Violin plot with the first version of facet_order and facet_labels
cat("Violin plot with the first version of facet_order and facet_labels.")
## Violin plot with the first version of facet_order and facet_labels.
violin_AMP_1<-ggplot(data_processed_after_exclusions, aes(x = gender, y = AMP_score, fill = gender)) +
  geom_violin(alpha = 0.8) +
  scale_fill_manual(values = dark2_colors, name = "Gender", breaks = facet_order_1, labels=facet_labels_1)+
  theme_bw()  +
  labs(title = "Violin plot of AMP score by Gender: Version 1", x = "Gender", y = "AMP score") 
violin_AMP_1

#Violin plot with the second version of facet_order and facet_labels
cat("Violin plot with the second version of facet_order and facet_labels.")
## Violin plot with the second version of facet_order and facet_labels.
violin_AMP_2<-ggplot(data_processed_after_exclusions, aes(x = gender, y = AMP_score, fill = gender)) +
  geom_violin(alpha = 0.8) +
  scale_fill_manual(values = dark2_colors, name = "Gender", breaks = facet_order_2, labels=facet_labels_2)+
  theme_bw()  +
  labs(title = "Violin plot of AMP score by Gender: Version 2", x = "Gender", y = "AMP score") 
violin_AMP_2

#Split raincloud plot
ggplot(data_processed_after_exclusions, aes(x = gender, y = AMP_score, fill = gender)) +
  geom_rain(alpha = 0.8) +
  scale_fill_manual(values = dark2_colors) +
  labs(title = "Raincloud Plot of AMP score by Gender", x = "Gender", y = "AMP score") +
  scale_fill_manual(values = dark2_colors, name = "Gender", breaks = facet_order_2, labels=facet_labels_2)+
  scale_x_discrete(name = "Gender",labels = facet_labels_2) +
  theme_bw()

ggplot(data_processed_after_exclusions, aes(x = gender, y = AMP_score, fill = gender)) +
  geom_rain(alpha = 0.8) +
  scale_fill_manual(values = dark2_colors) +
  labs(title = "Raincloud Plot of AMP score by Gender", x = "Gender", y = "AMP score") +
  scale_fill_manual(values = dark2_colors, name = "Gender", breaks = facet_order_1, labels=facet_labels_1)+
  scale_x_discrete(name = "Gender",labels = facet_labels_1) +
  coord_flip()+
  theme_bw()

#For the combined plot: Single plots without legend and without title
data_processed_after_exclusions$gender <- factor(data_processed_after_exclusions$gender, levels = facet_order_1)
facet_labels_custom <- c("female" = "Female", "male" = "Male ", "nonbinary"="Non-Binary")

histogramplot_AMP <- ggplot(data_processed_after_exclusions, aes(x = AMP_score, fill = gender)) +
  geom_histogram(binwidth = 0.05, color = "white", alpha = 0.8) +
  coord_cartesian(xlim = c(min(data_processed_after_exclusions$AMP_score), max(data_processed_after_exclusions$AMP_score)))+
  scale_x_continuous(breaks = c(0, 0.5, 1)) +
  labs(x = "AMP score", y = "Frequency") +
  facet_wrap(~ gender, scales = "fixed", labeller = as_labeller(facet_labels_custom))+
  scale_fill_manual(values = dark2_colors, name = "Gender", breaks = facet_order_1, labels = facet_order_1) +
  theme_bw() +
  guides(fill = FALSE)

# Violin plot
violin_AMP <- ggplot(data_processed_after_exclusions, aes(x = gender, y = AMP_score, fill = gender)) +
  geom_violin(alpha = 0.8) +
  scale_fill_manual(values = dark2_colors) +
  theme_bw() +
  guides(fill = "none") + 
  scale_x_discrete(name = "", labels = facet_labels_1)+
  labs(x = "", y = "AMP score")

# Raincloud plots 
raincloudplot_AMP_h<-ggplot(data_processed_after_exclusions, aes(x = gender, y = AMP_score, fill = gender)) +
  geom_rain(alpha = 0.8) +
  scale_fill_manual(values = dark2_colors) +
  labs( x = "Gender", y = "AMP score") +
  scale_x_discrete(name = "gender",labels = facet_labels_1) +
  scale_x_discrete(name = "", labels = facet_labels_1)+
  theme_bw()+
  guides(fill = "none") 

raincloudplot_AMP_v <- ggplot(data_processed_after_exclusions, aes(x = gender, y = AMP_score, fill = gender)) +
  geom_rain(alpha = 0.8) +
  labs( x = "Gender", y = "AMP score") +
  scale_fill_manual(values = dark2_colors)+
  coord_flip()+
  guides(fill = "none") + 
  scale_x_discrete(name = "", labels = facet_labels_1)+
  theme_bw()

Test

run an appropriate test. Below the output, print an interpretation of the results generated by the ‘easystats’ package report. I.e., use report::report().

# Assumption from plots: AMP Score is approx. normally distributed, plus we have a sample size > 30, so that the t-test can be performed

# Using the t-test to see if AMP_score are different between men and woman

# t-test
t_test_result_amp <- t.test(AMP_score ~ gender, data = subset(data_processed_after_exclusions, gender!="nonbinary"))

report(t_test_result_amp)
## Effect sizes were labelled following Cohen's (1988) recommendations.
## 
## The Welch Two Sample t-test testing the difference of AMP_score by gender (mean
## in group female = 0.58, mean in group male = 0.58) suggests that the effect is
## positive, statistically not significant, and very small (difference = 3.16e-03,
## 95% CI [-0.08, 0.09], t(76.08) = 0.07, p = 0.942; Cohen's d = 0.02, 95% CI
## [-0.43, 0.47])
# Results suggest there is no significant difference in AMP scores between male and female

# Shapiro-Wilk Test for normal distribution (male)
print(shapiro.test(subset(data_processed_after_exclusions, gender=='male')$AMP_score))
## 
##  Shapiro-Wilk normality test
## 
## data:  subset(data_processed_after_exclusions, gender == "male")$AMP_score
## W = 0.90452, p-value = 0.0008824
# Shapiro-Wilk Test for normal distribution (female)
print(shapiro.test(subset(data_processed_after_exclusions, gender=='female')$AMP_score))
## 
##  Shapiro-Wilk normality test
## 
## data:  subset(data_processed_after_exclusions, gender == "female")$AMP_score
## W = 0.89114, p-value = 0.001241
# Both tests confirm the non-normal distribution

# Wilcoxon rank-sum test
wilcox_test_result <- wilcox.test(AMP_score ~ gender, data = subset(data_processed_after_exclusions, gender!='nonbinary'), exact=FALSE)
wilcox_test_result
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  AMP_score by gender
## W = 941, p-value = 0.9693
## alternative hypothesis: true location shift is not equal to 0
cat("The Wilcoxon test also confirm the results from the t-test. However, one can also use the t-test as n>30.")
## The Wilcoxon test also confirm the results from the t-test. However, one can also use the t-test as n>30.

Combining plots

Combine plots using the library patchwork.

Combine at least three of the above plots into one.

combineplot_h<-histogramplot_AMP/violin_AMP/raincloudplot_AMP_h
combineplot_v<-((histogramplot_AMP/ violin_AMP)|raincloudplot_AMP_v)+plot_annotation(title = 'Distribution of AMP score by gender')
#or
#combineplot<-grid.arrange(histogramplot_AMP_1,violin_AMP_1,raincloudplot_AMP, ncol = 1)
combineplot_v

combineplot_h

Saving plots

Save plots to disk with ggsave()

Save the above combined plot to disk as both .png and .pdf. Ensure the png has at least 300dpi resolution.

dir.create("../plot/")

ggsave("../plot/combineplot.png",dpi = 300,combineplot_v)
ggsave("../plot/combineplot.pdf",dpi = 300,combineplot_v)

Session info

sessionInfo()
## R version 4.3.1 (2023-06-16)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Sonoma 14.0
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: Europe/Zurich
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] crayon_1.5.2       ggmagnify_0.2.0    report_0.5.8       ggdist_3.3.1      
##  [5] ggrepel_0.9.4      ggExtra_0.10.1     psych_2.3.6        gridExtra_2.3     
##  [9] ggrain_0.0.3       ggridges_0.5.4     RColorBrewer_1.1-3 scales_1.2.1      
## [13] janitor_2.2.0      kableExtra_1.3.4   knitr_1.44         lubridate_1.9.2   
## [17] forcats_1.0.0      stringr_1.5.0      dplyr_1.1.3        purrr_1.0.2       
## [21] readr_2.1.4        tidyr_1.3.0        tibble_3.2.1       ggplot2_3.4.3     
## [25] tidyverse_2.0.0    patchwork_1.1.3   
## 
## loaded via a namespace (and not attached):
##  [1] mnormt_2.1.1         polynom_1.4-1        rlang_1.1.1         
##  [4] magrittr_2.0.3       snakecase_0.11.1     compiler_4.3.1      
##  [7] mgcv_1.8-42          systemfonts_1.0.4    vctrs_0.6.3         
## [10] quadprog_1.5-8       rvest_1.0.3          pkgconfig_2.0.3     
## [13] fastmap_1.1.1        ellipsis_0.3.2       labeling_0.4.3      
## [16] effectsize_0.8.6     utf8_1.2.3           promises_1.2.1      
## [19] rmarkdown_2.24       tzdb_0.4.0           ragg_1.2.5          
## [22] bit_4.0.5            xfun_0.40            cachem_1.0.8        
## [25] jsonlite_1.8.7       gghalves_0.1.4       highr_0.10          
## [28] later_1.3.1          parallel_4.3.1       R6_2.5.1            
## [31] bslib_0.5.1          stringi_1.7.12       jquerylib_0.1.4     
## [34] Rcpp_1.0.11          parameters_0.21.3    httpuv_1.6.11       
## [37] Matrix_1.6-1.1       splines_4.3.1        timechange_0.2.0    
## [40] tidyselect_1.2.0     rstudioapi_0.15.0    yaml_2.3.7          
## [43] miniUI_0.1.1.1       lattice_0.21-8       shiny_1.7.5         
## [46] withr_2.5.0          bayestestR_0.13.1    evaluate_0.21       
## [49] ggpp_0.5.5           xml2_1.3.5           pillar_1.9.0        
## [52] insight_0.19.7       distributional_0.3.2 generics_0.1.3      
## [55] vroom_1.6.3          hms_1.1.3            munsell_0.5.0       
## [58] xtable_1.8-4         glue_1.6.2           tools_4.3.1         
## [61] webshot_0.5.5        grid_4.3.1           datawizard_0.9.0    
## [64] colorspace_2.1-0     nlme_3.1-162         cli_3.6.1           
## [67] textshaping_0.3.6    fansi_1.0.4          viridisLite_0.4.2   
## [70] svglite_2.1.1        gtable_0.3.4         sass_0.4.7          
## [73] digest_0.6.33        farver_2.1.1         htmltools_0.5.6     
## [76] lifecycle_1.0.3      httr_1.4.7           mime_0.12           
## [79] bit64_4.0.5          MASS_7.3-60